import os
import numpy as np
import pandas as pd
# TensorFlow
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.losses import BinaryCrossentropy, categorical_crossentropy
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import SGD
# Visualisation libraries
## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex, clear_output
## progressbar
import progressbar
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
## seaborn
import seaborn as sns
## matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Polygon
from matplotlib.font_manager import FontProperties
import matplotlib.colors as mcolors
from matplotlib.colors import LinearSegmentedColormap
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from matplotlib import cm
plt.style.use('seaborn-whitegrid')
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['text.color'] = 'k'
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
This article is based on the TensorFlow Image Classification article where we demonstrate image classification using TensorFlow. The dataset that we use here is a filtered version of Dogs vs Cats dataset from Kaggle.
def Get_Data(_URL, Remove = True):
# The dataset URL
File = _URL.split('/')[-1]
Full_Name = os.path.join(os.getcwd(), File)
# Download the dataset file from the URL
path_to_zip = tf.keras.utils.get_file(fname =Full_Name, origin=_URL, extract=True, cache_dir = os.getcwd())
PATH = os.path.join(os.path.dirname(path_to_zip), 'datasets', File.split('.')[0])
# Deleting the zip file
if Remove:
os.remove(File)
return PATH
#-----------------------------------------------------------------
_URL = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'
PATH = Get_Data(_URL)
Downloading data from https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip 68608000/68606236 [==============================] - 1s 0us/step
def Path_Tree(PATH):
sep = ' ' * 3
title = PATH.split('\\')[-1]
print(Style.RESET_ALL + Fore.BLUE + Style.NORMAL + '=' * (len(title) +1) + Style.RESET_ALL)
print(Back.BLACK + Fore.CYAN + Style.NORMAL + title+':'+ Style.RESET_ALL)
print(Style.RESET_ALL + Fore.BLUE + Style.NORMAL + '=' * (len(title) +1)+ Style.RESET_ALL)
for entry in os.listdir(PATH):
sub = os.path.join(PATH, entry)
if os.path.isdir(sub):
print('└──',Back.CYAN + Fore.BLACK + Style.NORMAL + entry+':'+ Style.RESET_ALL)
for entry1 in os.listdir(sub):
sub1 = os.path.join(sub, entry1)
if os.path.isdir(sub):
print(sep + '└──',Back.MAGENTA + Fore.BLACK + Style.NORMAL + entry1+':'+ Style.RESET_ALL)
List = os.listdir(sub1)
print(2* sep, Back.YELLOW + Fore.BLACK + Style.NORMAL +
'%i %s files' % (len(List), List[0].split('.')[-1].upper()) + Style.RESET_ALL)
print(2* sep, ', '.join(List[:5]) + ', ...')
#-----------------------------------------------------------------
Path_Tree(PATH)
======================= cats_and_dogs_filtered: ======================= └── train: └── cats: 1000 JPG files cat.0.jpg, cat.1.jpg, cat.10.jpg, cat.100.jpg, cat.101.jpg, ... └── dogs: 1000 JPG files dog.0.jpg, dog.1.jpg, dog.10.jpg, dog.100.jpg, dog.101.jpg, ... └── validation: └── cats: 500 JPG files cat.2000.jpg, cat.2001.jpg, cat.2002.jpg, cat.2003.jpg, cat.2004.jpg, ... └── dogs: 500 JPG files dog.2000.jpg, dog.2001.jpg, dog.2002.jpg, dog.2003.jpg, dog.2004.jpg, ...
def Data_Info(PATH):
Set = [];
Subset = [];
Size = [];
DataDirs = {};
for entry in os.listdir(PATH):
sub = os.path.join(PATH, entry)
if os.path.isdir(sub):
DataDirs[entry] = sub
for entry1 in os.listdir(sub):
sub1 = os.path.join(sub, entry1)
if os.path.isdir(sub):
DataDirs[entry + '_' +entry1] = sub1
Set.append(entry.title())
Subset.append(entry1.title())
Size.append(len(os.listdir(sub1)))
DataFrame_Info = pd.DataFrame({'Set': Set, 'Subset': Subset, 'Size':Size})
display(DataFrame_Info.set_index(['Set' , 'Subset']).T)
return DataFrame_Info, DataDirs
#-----------------------------------------------------------------
DataFrame_Info, DataDirs = Data_Info(PATH)
| Set | Train | Validation | ||
|---|---|---|---|---|
| Subset | Cats | Dogs | Cats | Dogs |
| Size | 1000 | 1000 | 500 | 500 |
batch_size = 128
epochs = 15
Img_Height = 150
Img_Width = 150
image_gen_train = ImageDataGenerator(
# Rescaling the tensors from values between 0 and 255 to values between 0 and 1
rescale=1./255,
# Applying 45 degrees of rotation randomly
rotation_range=45,
# Range for random horizontal shifts.
width_shift_range=.15,
# Range for random vertical shifts.
height_shift_range=.15,
# applying random horizontal flip augmentation
horizontal_flip=True,
# Applying a zoom augmentation to the dataset to zoom images up to 50%
zoom_range=0.5
)
print(Back.WHITE + Fore.BLACK + Style.NORMAL + 'Train Data:'+ Style.RESET_ALL)
# flow_from_directory method load images from the disk
train_data_gen = image_gen_train.flow_from_directory(batch_size=batch_size,
directory=DataDirs['train'],
shuffle=True,
target_size=(Img_Height, Img_Width),
class_mode='binary')
Train Data:
Found 2000 images belonging to 2 classes.
def plotImages(images_arr, s = 3.4, Title = False):
fig, axes = plt.subplots(1, len(images_arr), figsize=(s* len(images_arr),s))
axes = axes.flatten()
font = FontProperties()
font.set_weight('bold')
for img, ax in zip( images_arr, axes):
ax.imshow(img)
ax.axis('off')
_ = fig.tight_layout()
_ = fig.subplots_adjust(wspace= 5e-3)
if Title:
font = FontProperties()
font.set_weight('bold')
_ = fig.suptitle(Title, y = 1.05, fontsize = 18)
sample_images, _ = next(train_data_gen)
plotImages(sample_images[:4], Title = 'Four Random Pictures from the Train Sample')
augmented_images = [train_data_gen[0][0][0] for i in range(5)]
plotImages(augmented_images, Title = 'Original and Augmented Pictures', s = 3)
validation_image_generator = ImageDataGenerator(rescale=1./255)
print(Back.WHITE + Fore.BLACK + Style.NORMAL + 'Validation Data:'+ Style.RESET_ALL)
val_data_gen = validation_image_generator.flow_from_directory(batch_size = batch_size,
directory = DataDirs['validation'],
target_size = (Img_Height, Img_Width),
class_mode = 'binary')
sample_images, _ = next(val_data_gen)
plotImages(sample_images[:4], Title = 'Four Random Pictures from the Train Sample')
Validation Data:
Found 1000 images belonging to 2 classes.
We use Keras Sequential model for creating a model.
model = Sequential(name = 'CNN')
model.add(Conv2D(16, 3, padding='same', activation='relu', input_shape=(Img_Height, Img_Width ,3)))
model.add(MaxPooling2D())
# regularization
model.add(Dropout(0.2))
model.add(Conv2D(32, 3, padding='same', activation='relu'))
model.add(MaxPooling2D())
model.add(Conv2D(64, 3, padding='same', activation='relu'))
model.add(MaxPooling2D())
# regularization
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(1))
model.summary()
plot_model(model, show_shapes=True, show_layer_names=False, expand_nested = True, rankdir = 'TB')
Model: "CNN" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d (Conv2D) (None, 150, 150, 16) 448 _________________________________________________________________ max_pooling2d (MaxPooling2D) (None, 75, 75, 16) 0 _________________________________________________________________ dropout (Dropout) (None, 75, 75, 16) 0 _________________________________________________________________ conv2d_1 (Conv2D) (None, 75, 75, 32) 4640 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 37, 37, 32) 0 _________________________________________________________________ conv2d_2 (Conv2D) (None, 37, 37, 64) 18496 _________________________________________________________________ max_pooling2d_2 (MaxPooling2 (None, 18, 18, 64) 0 _________________________________________________________________ dropout_1 (Dropout) (None, 18, 18, 64) 0 _________________________________________________________________ flatten (Flatten) (None, 20736) 0 _________________________________________________________________ dense (Dense) (None, 512) 10617344 _________________________________________________________________ dense_1 (Dense) (None, 1) 513 ================================================================= Total params: 10,641,441 Trainable params: 10,641,441 Non-trainable params: 0 _________________________________________________________________
Compiling and fitting the model
model.compile(optimizer = 'adam', loss = BinaryCrossentropy(from_logits=True), metrics = ['accuracy'])
history = model.fit_generator(train_data_gen,
steps_per_epoch = DataFrame_Info.loc[DataFrame_Info['Set'] == 'Train','Size'].sum() // batch_size,
epochs = epochs,
validation_data = val_data_gen,
validation_steps = DataFrame_Info.loc[DataFrame_Info['Set'] == 'Validation','Size'].sum() // batch_size)
# clear_output()
Epoch 1/15 15/15 [==============================] - 25s 2s/step - loss: 1.2661 - accuracy: 0.5091 - val_loss: 0.6930 - val_accuracy: 0.5022 Epoch 2/15 15/15 [==============================] - 15s 978ms/step - loss: 0.6923 - accuracy: 0.5011 - val_loss: 0.6924 - val_accuracy: 0.5033 Epoch 3/15 15/15 [==============================] - 15s 987ms/step - loss: 0.6922 - accuracy: 0.4995 - val_loss: 0.6920 - val_accuracy: 0.5011 Epoch 4/15 15/15 [==============================] - 15s 990ms/step - loss: 0.6920 - accuracy: 0.5059 - val_loss: 0.6907 - val_accuracy: 0.5045 Epoch 5/15 15/15 [==============================] - 15s 1s/step - loss: 0.6901 - accuracy: 0.4968 - val_loss: 0.6868 - val_accuracy: 0.4967 Epoch 6/15 15/15 [==============================] - 15s 1s/step - loss: 0.6888 - accuracy: 0.5021 - val_loss: 0.6889 - val_accuracy: 0.4978 Epoch 7/15 15/15 [==============================] - 15s 1s/step - loss: 0.6848 - accuracy: 0.5048 - val_loss: 0.6810 - val_accuracy: 0.5078 Epoch 8/15 15/15 [==============================] - 15s 1s/step - loss: 0.6822 - accuracy: 0.5037 - val_loss: 0.6730 - val_accuracy: 0.5112 Epoch 9/15 15/15 [==============================] - 15s 987ms/step - loss: 0.6764 - accuracy: 0.5112 - val_loss: 0.6807 - val_accuracy: 0.5156 Epoch 10/15 15/15 [==============================] - 15s 984ms/step - loss: 0.6704 - accuracy: 0.5417 - val_loss: 0.6572 - val_accuracy: 0.5815 Epoch 11/15 15/15 [==============================] - 15s 980ms/step - loss: 0.6681 - accuracy: 0.5417 - val_loss: 0.6482 - val_accuracy: 0.5848 Epoch 12/15 15/15 [==============================] - 15s 995ms/step - loss: 0.6571 - accuracy: 0.5646 - val_loss: 0.6377 - val_accuracy: 0.6306 Epoch 13/15 15/15 [==============================] - 15s 1s/step - loss: 0.6513 - accuracy: 0.5737 - val_loss: 0.6348 - val_accuracy: 0.6161 Epoch 14/15 15/15 [==============================] - 15s 976ms/step - loss: 0.6537 - accuracy: 0.5705 - val_loss: 0.6319 - val_accuracy: 0.6183 Epoch 15/15 15/15 [==============================] - 14s 953ms/step - loss: 0.6501 - accuracy: 0.5780 - val_loss: 0.6252 - val_accuracy: 0.6250
def Search_List(Key, List): return [s for s in List if Key in s]
Metrics_Names = {'loss':'Loss', 'accuracy':'Accuracy', 'mae':'MAE', 'mse':'MSE', 'recall': 'Recall'}
def Table_modify(df, Metrics_Names = Metrics_Names):
df = df.rename(columns = Metrics_Names)
df = df.reindex(sorted(df.columns), axis=1)
df.insert(loc = 0, column = 'Iteration', value = np.arange(0, df.shape[0]), allow_duplicates=False)
return df
Validation_Table = Search_List('val_',history.history.keys())
Train_Table = list(set( history.history.keys()) - set(Validation_Table))
Validation_Table = pd.DataFrame(np.array([history.history[x] for x in Validation_Table]).T, columns = Validation_Table)
Train_Table = pd.DataFrame(np.array([history.history[x] for x in Train_Table]).T, columns = Train_Table)
Validation_Table.columns = [x.replace('val_','') for x in Validation_Table.columns]
Train_Table = Table_modify(Train_Table)
Validation_Table = Table_modify(Validation_Table)
# Train Set Score
score = model.evaluate(train_data_gen, batch_size = batch_size, verbose = 0)
score = pd.DataFrame(score, index = model.metrics_names).T
score.index = ['Train Set Score']
# Validation Set Score
Temp = model.evaluate(val_data_gen, batch_size = batch_size, verbose = 0)
Temp = pd.DataFrame(Temp, index = model.metrics_names).T
Temp.index = ['Validation Set Score']
score = score.append(Temp)
score.rename(columns= Metrics_Names, inplace = True)
score = score.reindex(sorted(score.columns), axis=1)
display(score.style.set_precision(4))
| Accuracy | Loss | |
|---|---|---|
| Train Set Score | 0.5550 | 0.6500 |
| Validation Set Score | 0.6220 | 0.6272 |
def Plot_history(history, PD, Title = False, metrics_names = [x.title() for x in model.metrics_names]):
fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.02, column_widths=[0.6, 0.4],
specs=[[{"type": "scatter"},{"type": "table"}]])
# Left
Colors = ['OrangeRed', 'MidnightBlue', 'purple']
for j in range(len(metrics_names)):
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history[metrics_names[j]].values,
line=dict(color=Colors[j], width= 1.5), name = metrics_names[j]), 1, 1)
fig.update_layout(legend=dict(x=0, y=1.1, traceorder='reversed', font_size=12),
dragmode='select', plot_bgcolor= 'white', height=600, hovermode='closest',
legend_orientation='h')
fig.update_xaxes(range=[history.Iteration.min(), history.Iteration.max()],
showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
fig.update_yaxes(range=[0, PD['yLim']], showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
# Right
if not PD['Table_Rows'] == None:
ind = np.linspace(0, history.shape[0], PD['Table_Rows'], endpoint = False).round(0).astype(int)
ind = np.append(ind, history.index[-1])
history = history[history.index.isin(ind)]
T = history.copy()
T[metrics_names] = T[metrics_names].applymap(lambda x: '%.4e' % x)
Temp = []
for i in T.columns:
Temp.append(T.loc[:,i].values)
TableColors = PD['TableColors']
fig.add_trace(go.Table(header=dict(values = list(history.columns), line_color=TableColors[0],
fill_color=TableColors[0], align=['center','center'], font=dict(color=TableColors[1], size=12), height=25),
columnwidth = PD['tablecolumnwidth'], cells=dict(values=Temp, line_color=TableColors[0],
fill=dict(color=[TableColors[1], TableColors[1]]),
align=['center', 'center'], font_size=12,height=20)), 1, 2)
if Title != False:
fig.update_layout(plot_bgcolor= 'white',
title={'text': Title, 'x':0.46, 'y':0.94, 'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.show()
PD = dict(Table_Rows = 25, yLim = 1.2, tablecolumnwidth = [0.3, 0.4, 0.4], TableColors = ['Navy','White'])
Plot_history(Train_Table, Title = 'Train Set', PD = PD)
Plot_history(Validation_Table, Title = 'Validation Set', PD = PD)
Here, we only went through a few iterations; however, we need to train the model for more iterations to get acceptable results.